# coding: utf-8


import os
import numpy as np
from PIL import Image
import binascii
import time
import math
from tqdm import tqdm
from multiprocessing import Pool


NORMAL_EXE_PATH = '../software/normal'
MALWARE_EXE_PATH = '../software/malware'
NORMAL_IMG_PATH = '../imgs/normal_img'
MALWARE_IMG_PATH = '../imgs/malware_img'


# 如果文件夹不存在则创建对应的文件夹
if not os.path.exists(NORMAL_IMG_PATH):
    os.makedirs(NORMAL_IMG_PATH)
if not os.path.exists(MALWARE_IMG_PATH):
    os.makedirs(MALWARE_IMG_PATH)


def getMatrixfrom_bin(filename, width):
    with open(filename, 'rb') as f:
        content = f.read()
    hexst = binascii.hexlify(content)  # 将二进制文件转换为十六进制字符串
    fh = np.array([int(hexst[i:i+2], 16) for i in range(0, len(hexst), 2)])  # 按字节分割
    rn = len(fh)//width
    fh = np.reshape(fh[:rn*width],(-1, width))  # 根据设定的宽度生成矩阵
    fh = np.uint8(fh)
    return fh


def exe_to_img_task(file, exe_path, img_path):
    exe_file = os.path.join(exe_path, file)
    img_file = os.path.join(img_path, file + '.png')
    print(img_file)
    if not os.path.exists(img_file):  # 判断是否已经生成过该 exe 文件对应的图片
        # 判断文件大小
        file_size = os.path.getsize(exe_file)
        file_size = file_size / 1024  # 转为 KB
        if file_size == 0:  # 有的文件可能为空
            return
        width = 512
        # 文件大小在合理范围内，执行以下语句
        fh = getMatrixfrom_bin(exe_file, width)
        im = Image.fromarray(fh)  # 转换为图像
        im.save(img_file)


def get_img_files(exe_path, img_path):

    files = os.listdir(exe_path)
    print(len(files))

    exe_rm_list = []
    with open('../exe_rm.txt', 'r') as f:
        for line in f:
            file = line.strip('\n')
            exe_rm_list.append(file)
    exe_rm_set = set(exe_rm_list)

    exe_clean = []
    for file in files:
        if file not in exe_rm_set:
            exe_clean.append(file)

    files = exe_clean
    print(len(files))

    p = Pool(11)
    for file in files:
        p.apply_async(exe_to_img_task, args=(file, exe_path, img_path,))
    p.close()
    p.join()


if __name__ == '__main__':
    get_img_files(NORMAL_EXE_PATH, NORMAL_IMG_PATH)
    get_img_files(MALWARE_EXE_PATH, MALWARE_IMG_PATH)
